In [2]:
    
import sqlalchemy
import pandas as pd
import re
    
Read the mapping from SQL
In [3]:
    
e = sqlalchemy.create_engine('mysql://root:root@127.0.0.1:3306/UMLS_ext_mappings')
    
In [66]:
    
df0 = pd.read_sql("select CTV3_CONCEPTID, V2_CONCEPTID from RCD_V3_to_V2", e)\
        .drop_duplicates()\
        .sort_index(by='CTV3_CONCEPTID')
df0.describe()
    
    Out[66]:
Number of entries in the mapping without Read2 code
In [67]:
    
df0[df0.V2_CONCEPTID.str.contains('_') == True].V2_CONCEPTID.value_counts()
    
    Out[67]:
In [68]:
    
df = df0[df0.V2_CONCEPTID.str.contains('_') == False]
    
Number of occurrences of READ3 codes in mapping
In [70]:
    
occurrences = df.CTV3_CONCEPTID.value_counts().value_counts()
DataFrame(data={'codes': occurrences.values, 'occurrences': occurrences.index}, index=['',''])
    
    Out[70]:
In [71]:
    
# Fever
code3s = ["R0062", "X76Df", "X76Di", "X76EI", "XM0yv"]
df[df.CTV3_CONCEPTID.isin(code3s)]
    
    Out[71]:
In [ ]: